###Replication data for "Does inequality beget inequality? Experimental tests of the prediction that inequality increases system justification motivation 
### Kris-Stella Trump and Ariel White
### December 2017
### This replication code for the main study presented in body of article
### Data from TESS study 605 (http://www.tessexperiments.org/data/trump605.html)
### Code tested in R version 3.4.1

#Create log file
sink(file="TESS_analysis_code_log.txt")

#Packages
install.packages(c("foreign", "pwr", "weights", "Hmisc", "stargazer", "psych", "SDMTools"))

#Loading data 
#Don't forget to set working directory
require(foreign)
full <- read.spss("TESS3 174 - Trump_Client.sav", to.data.frame=TRUE)

#Look at data
head(full); dim(full)
names(full)


######################
#construct scales and variables

#treatment condition
table(full$TESS174)
full$highgini <- 0
full[substr(full$TESS174, 1, 11) =="Treatment A", "highgini"]<- 1

#set up the SJT scale, reverse-coding items 3 and 7
full$SJT_1 <- as.numeric(full$QA_1)
full$SJT_2 <- as.numeric(full$QA_2)
full$SJT_3 <- 12- as.numeric(full$QA_3) #subtract from 12 to make it compatible with the others (so min is 2)
full$SJT_4 <- as.numeric(full$QA_4)
full$SJT_5 <- as.numeric(full$QA_5)
full$SJT_6 <- as.numeric(full$QA_6)
full$SJT_7 <- 12 - as.numeric(full$QA_7)
full$SJT_8 <- as.numeric(full$QA_8)

full$SJTmean <- rowMeans(full[, c("SJT_1", "SJT_2", "SJT_3", "SJT_4", "SJT_5", "SJT_6", "SJT_7", "SJT_8")]) 
full[full$SJT_1==1 & is.na(full$SJTmean)==F, "SJTmean"] <- NA #one person refusing all Q's whose responses were recorded as "1".

#internal consistency
require(psych)
sjt <- full[c("SJT_1", "SJT_2", "SJT_3", "SJT_4", "SJT_5", "SJT_6", "SJT_7", "SJT_8")]
alpha(sjt) #0.76 

#standardize to 0-1 
full$SJTmean <- full$SJTmean/10
#check distribution and mean
plot(density(full$SJTmean, na.rm=TRUE))
#fair approximation of a normal distribution
summary(full$SJTmean)
sd(full$SJTmean, na.rm=T)
#are variances equal between treatment and control
var.test(full[full$highgini==1, "SJTmean"],full[full$highgini==0, "SJTmean"]) #yes


#take mean of the "confidence in institutions" question
#(note that numerical coding chosen by Gfk is counter-intuitive, 2-4 where 2 is highest confidence and 1 indicates refusal. recoded below to 1-3 where 3 is highest confidence.)
full$conf1 <- as.numeric(full$QB_1)
full$conf2 <- as.numeric(full$QB_2)
full$conf3 <- as.numeric(full$QB_3)
full$conf4 <- as.numeric(full$QB_4)
full$conf5 <- as.numeric(full$QB_5)
full$conf6 <- as.numeric(full$QB_6)

full$confidencemean <- rowMeans(full[, c("conf1","conf2","conf3","conf4","conf5","conf6")])
full[full$confidencemean == 1 & is.na(full$confidencemean)==F,"confidencemean"]<- NA #another refuser.
#make confidence mean score range 1-3 with 3 highest
full$confidencemean <- 5-full$confidencemean
#standardize to 0-1 scale
full$confidencemean <- full$confidencemean/3

#internal consistency
conf <- full[c("conf1","conf2","conf3","conf4","conf5","conf6")]
alpha(conf) #0.74 

#check distribution and mean
plot(density(full$confidencemean, na.rm=TRUE))
#fair approximation of a normal distribution
summary(full$confidencemean)
sd(full$confidencemean, na.rm=T)
#are variances equal between treatment and control
var.test(full[full$highgini==1, "confidencemean"],full[full$highgini==0, "confidencemean"]) #yes


# set up economic SJT scale, reverse-coding items 2,4,6,8,9,12,14 
full$econ1 <- as.numeric(full$QC_1) 
full$econ2 <- 11- as.numeric(full$QC_2)
full$econ3 <- as.numeric(full$QC_3)
full$econ4 <- 11-as.numeric(full$QC_4)
full$econ5 <- as.numeric(full$QC_5)
full$econ6 <- 11-as.numeric(full$QC_6)
full$econ7 <- as.numeric(full$QC_7)
full$econ8 <- 11-as.numeric(full$QC_8)
full$econ9 <- 11-as.numeric(full$QC_9)
full$econ10 <- as.numeric(full$QC_10)
full$econ11 <- as.numeric(full$QC_11)
full$econ12 <- 11-as.numeric(full$QC_12)
full$econ13 <- as.numeric(full$QC_13)
full$econ14 <- 11-as.numeric(full$QC_14)
full$econ15 <- as.numeric(full$QC_15)

full$econmean <- rowMeans(full[, c("econ1","econ2","econ3","econ4","econ5","econ6", "econ7","econ8","econ9","econ10","econ11","econ12","econ13","econ14","econ15")])
#also need to figure out missingness here.
full[full$econ1== 1 & is.na(full$econmean)==F, "econmean"]<- NA #full-on refusers (2 people)
full[full$econ3== 1 & is.na(full$econmean)==F,"econmean"]<- NA #partial refusers (2 people)

#internal consistency
econsjt <- full[c("econ1","econ2","econ3","econ4","econ5","econ6", "econ7","econ8","econ9","econ10","econ11","econ12","econ13","econ14","econ15")]
alpha(econsjt) #0.78 

#standardize 0-1
full$econmean <- full$econmean/10

#check distribution and mean
plot(density(full$econmean, na.rm=TRUE))
#fair approximation of a normal distribution
summary(full$econmean)
sd(full$econmean, na.rm=T)
#are variances equal between treatment and control
var.test(full[full$highgini==1, "econmean"],full[full$highgini==0, "econmean"]) #marginal - p-0.058

#power check
require(pwr)
pwr.t.test(n=170, d=, sig.level=0.05, power=0.8, type="two.sample")
#powered for d=0.3



###############################
#Analysis

#start with t-tests for H1
require(weights)
## SJT scale
t.test(full[full$highgini==1,"SJTmean"],full[full$highgini==0,"SJTmean"])
#high gini treatment decreases scores on system justification scale
#check with regression and weighted t-test also
sjtdata <- full[!is.na(full$SJTmean),]
sjtreg <- lm(formula=SJTmean ~ highgini, data=sjtdata, weights=sjtdata$Weight)
anova(sjtreg)
summary(sjtreg)
wtd.t.test(sjtdata[sjtdata$highgini==1,"SJTmean"],sjtdata[sjtdata$highgini==0,"SJTmean"], weight= sjtdata[sjtdata$highgini==1,"Weight"], weighty= sjtdata[sjtdata$highgini==0,"Weight"], drops="pairwise") 

## confidence in institutions
t.test(full[full$highgini==1,"confidencemean"],full[full$highgini==0,"confidencemean"])
confdata <- full[!is.na(full$confidencemean),]
wtd.t.test(confdata[confdata$highgini==1,"confidencemean"],confdata[confdata$highgini==0,"confidencemean"], weight= confdata[confdata$highgini==1,"Weight"], weighty= confdata[confdata$highgini==0,"Weight"]) 
confreg <- lm(formula=confidencemean ~ highgini, data=confdata, weights=confdata$Weight)
anova(confreg)
summary(confreg)

## economic SJT
t.test(full[full$highgini==1,"econmean"],full[full$highgini==0,"econmean"])
#Since var.test was marginally significant, run this also with var.equal=F
t.test(full[full$highgini==1,"econmean"],full[full$highgini==0,"econmean"], var.equal=F)
#No substantive difference
econdata <- full[!is.na(full$econmean),]
wtd.t.test(econdata[econdata$highgini==1,"econmean"],econdata[econdata$highgini==0,"econmean"], weight= econdata[econdata$highgini==1,"Weight"], weighty= econdata[econdata$highgini==0,"Weight"]) 
econreg <- lm(formula=econmean ~ highgini, data=econdata, weights=econdata$Weight)
anova(econreg)
summary(econreg)


###################
# adding covariates

#recode income as continuous
full$income <- NA
full[substr(full$PPINCIMP,12,16)=="5,000","income"] <- 2500
full[substr(full$PPINCIMP,2,6)=="5,000","income"] <- (5000+7499)/2
full[substr(full$PPINCIMP,2,6)=="7,500","income"] <- (7500+9999)/2
full[substr(full$PPINCIMP,2,7)=="10,000","income"] <- (10000+12499)/2
full[substr(full$PPINCIMP,2,7)=="12,500","income"] <- (12500+14999)/2
full[substr(full$PPINCIMP,2,7)=="15,000","income"] <- (15000+19999)/2
full[substr(full$PPINCIMP,2,7)=="20,000","income"] <- (20000+24999)/2
full[substr(full$PPINCIMP,2,7)=="25,000","income"] <- (25000+29999)/2
full[substr(full$PPINCIMP,2,7)=="30,000","income"] <- (30000+34999)/2
full[substr(full$PPINCIMP,2,7)=="35,000","income"] <- (35000+39999)/2
full[substr(full$PPINCIMP,2,7)=="40,000","income"] <- (40000+49999)/2
full[substr(full$PPINCIMP,2,7)=="50,000","income"] <- (50000+59999)/2
full[substr(full$PPINCIMP,2,7)=="60,000","income"] <- (60000+74999)/2
full[substr(full$PPINCIMP,2,7)=="75,000","income"] <- (75000+84999)/2
full[substr(full$PPINCIMP,2,7)=="85,000","income"] <- (85000+99999)/2
full[substr(full$PPINCIMP,2,8)=="100,000","income"] <- (100000+124999)/2
full[substr(full$PPINCIMP,2,8)=="125,000","income"] <- (125000+149999)/2
full[substr(full$PPINCIMP,2,8)=="150,000","income"] <- (150000+174999)/2
full[substr(full$PPINCIMP,2,8)=="175,000","income"] <- 200000
table(full$income)
summary(full$income)

#creat binary indicator of below median income
full$lowincome <- full$income<median(rep(full$income, times=full$Weight))
summary(full$lowincome)

#create income variable measured in thousands
full$inc.thou <- full$income/1000

#recode ideology as continuous from 1 to 7 where higher number more conservative
summary(full$ideo)
summary(as.numeric(full$ideo))
full$conservative <- as.numeric(full$ideo) - 1
#12 people refused, omit
full[full$conservative==0, "conservative"] <- NA


###Regressions with covariates
SJT1 <- lm(SJTmean ~ highgini, weights= Weight, data=full)
summary(SJT1)
SJT2 <- lm(SJTmean ~ highgini + PPETHM + ppagecat + PPGENDER + PPMARIT + PPINCIMP + ideo, weights= Weight, data=full)
summary(SJT2) #barely moves.
SJT3 <- lm(SJTmean ~ highgini + PPETHM + ppagecat + PPGENDER + PPMARIT + income + conservative, weights= Weight, data=full)
summary(SJT3) #now only marginally significant

conf1 <- lm(confidencemean ~ highgini, weights= Weight, data=full)
summary(conf1)
conf2 <- lm(confidencemean ~ highgini + PPETHM + ppagecat + PPGENDER + PPMARIT + PPINCIMP + ideo, weights= Weight, data=full)
summary(conf2) #still nothing
conf3 <- lm(confidencemean ~ highgini + PPETHM + ppagecat + PPGENDER + PPMARIT + income + conservative, weights= Weight, data=full)
summary(conf3) #still nothing

econ1 <- lm(econmean ~ highgini, weights= Weight, data=full)
summary(econ1) #no impact
econ2 <- lm(econmean ~ highgini + PPETHM + ppagecat + PPGENDER + PPMARIT + PPINCIMP + ideo, weights= Weight, data=full)
summary(econ2) #barely moves
econ3 <- lm(econmean ~ highgini + PPETHM + ppagecat + PPGENDER + PPMARIT + income + conservative, weights= Weight, data=full)
summary(econ3) #now high gini has a marginal negative effect on economic system justification




###############################
#interaction with income - binary indicator of below median hh income
SJT.inc <- lm(SJTmean ~ highgini*lowincome, weights= Weight, data=full)
summary(SJT.inc) #marginally positive interaction with low income

conf.inc <- lm(confidencemean ~ highgini*lowincome, weights= Weight, data=full)
summary(conf.inc)

econ.inc <- lm(econmean ~ highgini*lowincome, weights= Weight, data=full)
summary(econ.inc) #negative interaction with low income, no main effect


###############################
#interaction with income - continuous indicator of income in thousands USD
SJT.inc2 <- lm(SJTmean ~ highgini*inc.thou, weights= Weight, data=full)
summary(SJT.inc) 

conf.inc2 <- lm(confidencemean ~ highgini*inc.thou, weights= Weight, data=full)
summary(conf.inc)

econ.inc2 <- lm(econmean ~ highgini*inc.thou, weights= Weight, data=full)
summary(econ.inc) 



######################
#treatment checks

#Q2 is "Income inequality in the United States has increased dramatically over time."
#do responses differ depending on the graph shown?  
full$Q2true <- NA
full[full$Q2=="Correct", "Q2true"] <- 1
full[full$Q2=="Incorrect", "Q2true"] <- 0
full[full$Q2=="Don't Know", "Q2true"] <- 0

#test
t.test(full[full$highgini==1,"Q2true"],full[full$highgini==0,"Q2true"])
#high-gini treatment people are much more likely to agree with Q2. 
#psych-style version of this:
Q2reg <- lm(formula=Q2true ~ highgini, data=full)
anova(Q2reg)

###
#Q3 is "The share of total income of the very rich has not changed much over time in the United States. "

full$Q3true <- NA
full[full$Q3=="Correct", "Q3true"] <- 1
full[full$Q3=="Incorrect", "Q3true"] <- 0
full[full$Q3=="Don't Know", "Q3true"] <- 0

t.test(full[full$highgini==1,"Q3true"],full[full$highgini==0,"Q3true"])
Q3reg <- lm(formula=Q3true ~ highgini, data=full)
anova(Q3reg)
#And highgini treatment people are less likely to think that total income of the rich has not changed. 



###Tables and Figures

####Figure 1 in main text

##Look up the sd of each of the standardized measures. 
sd(full$SJTmean, na.rm=T)
sd(full$confidencemean, na.rm=T)
sd(full$econmean, na.rm=T)

##full/various outcomes
full_grid <- as.data.frame(cbind(rep(NA,3), rep(NA,3), rep(NA,3), rep(NA,3)))
full_grid$V1 <- c("System \n Justification", "Institutional\n Trust", "Econ. System \n Justification")
colnames(full_grid)<- c("outcome", "diff.est", "conf.low", "conf.high")

full_sjt <- t.test(full[full$highgini==1,"SJTmean"],full[full$highgini==0,"SJTmean"])
full_sjt$conf.int[1]
full_grid[1, 2] <- full_sjt$estimate[1]- full_sjt$estimate[2]
full_grid[1, 3] <- full_sjt$conf.int[1]
full_grid[1, 4] <- full_sjt$conf.int[2]

full_conf<- t.test(full[full$highgini==1,"confidencemean"],full[full$highgini==0,"confidencemean"])
full_conf$conf.int[2]
full_grid[2, 2] <- full_conf$estimate[1]- full_conf$estimate[2]
full_grid[2, 3] <- full_conf$conf.int[1]
full_grid[2, 4] <- full_conf$conf.int[2]

full_econ <- t.test(full[full$highgini==1,"econmean"],full[full$highgini==0,"econmean"])
full_grid[3, 2] <- full_econ$estimate[1]- full_econ$estimate[2]
full_grid[3, 3] <- full_econ$conf.int[1]
full_grid[3, 4] <- full_econ$conf.int[2]
full_grid$ruler <- c(3,2,1)

pdf("Fig 1 - TESS_differenceests_std.pdf")
par(las=1, oma=c(1, 4, 1, 0))
plot(NA,NA, main="Effect of High \nInequality Treatment", ylim=c(.5, 3.5), xlim=c(-0.12,0.12), yaxt='n', 
     xlab= "", ylab="", cex.main=1.3)
points(full_grid$diff.est, full_grid$ruler, pch=18, cex=2)
mtext("Estimated Difference Between \nHigh and Low Inequality Conditions", cex=1.3, side=1, line=4)
for (i in 1:3){
  segments(full_grid$conf.low[i], full_grid$ruler[i], full_grid$conf.high[i], full_grid$ruler[i], lwd="2.2")}
axis(side=2, at=full_grid$ruler, labels = full_grid$outcome, cex.axis=1.3)
abline(v=0, lty=3, col="gray")
dev.off()

####Table 1 in main text
#Interaction analysis using continuous measure of income. (This outputs Latex code)
require(stargazer)
stargazer(SJT.inc2,conf.inc2,econ.inc2, title="Interaction with Income",omit.stat=c("rsq","res.dev"),dep.var.labels=c("System Justification", "Confidence in Institutions", "Economic System Justification"), covariate.labels=c("High Inequality Treatment", "Household Income (thousands USD)", "High Inequality * Income", "Constant"), align=T)

#Descriptive stats in table A1, Supplemental info: 
require(Hmisc)
require(SDMTools)

#Variables (not exclusive list) used in determining weights:
#Age
describe(full$ppagecat, weights=full$Weight)
#Gender
describe(full$PPGENDER, weights=full$Weight)
#Income
incomeprop <- as.data.frame(describe(full$PPINCIMP, weights=full$Weight)$values$value)
incomeprop$proportion<-  describe(full$PPINCIMP, weights=full$Weight)$values$frequency/1020 
sum(incomeprop$proportion[c(1:7)]) #collapse some categories
sum(incomeprop$proportion[c(8:11)])
sum(incomeprop$proportion[c(12:13)])
sum(incomeprop$proportion[c(14:15)])
sum(incomeprop$proportion[c(16:19)])

#Race
describe(full$PPETHM, weights=full$Weight)

#Education
describe(full$PPEDUC, weights=full$Weight)
educprop <- as.data.frame(describe(full$PPEDUC, weights=full$Weight)$values$value)
educprop$proportion<-  describe(full$PPEDUC, weights=full$Weight)$values$frequency/1020 
#collapse some categories
sum(educprop$proportion[c(1:8)]) #less than high school
sum(educprop$proportion[c(9)]) #high school
sum(educprop$proportion[c(10)]) #some college
sum(educprop$proportion[c(11)]) #associate's 
sum(educprop$proportion[c(12)]) #bachelor's
sum(educprop$proportion[c(13)]) #master's
sum(educprop$proportion[c(14)]) #professional

#Variables not used to determine weights:
#Religion
describe(full$rel1, weights=full$Weight)
#Frequency of worship
describe(full$rel2, weights=full$Weight)

#Ideology
describe(full$conservative, weights=full$Weight)
wtd.mean(full$conservative, weights=full$Weight)
sqrt(wtd.var(full$conservative, weights=full$Weight))

#SJ
describe(full$SJTmean, weights=full$Weight)
wtd.mean(full$SJTmean, weights=full$Weight)
sqrt(wtd.var(full$SJTmean, weights=full$Weight))

#Institutional trust
describe(full$confidencemean, weights=full$Weight)
wtd.mean(full$confidencemean, weights=full$Weight)
sqrt(wtd.var(full$confidencemean, weights=full$Weight))
max(full$confidencemean, na.rm=T)
min(full$confidencemean, na.rm=T)

#Econ SJ
describe(full$econmean, weights=full$Weight)
wtd.mean(full$econmean, weights=full$Weight)
sqrt(wtd.var(full$econmean, weights=full$Weight))
max(full$econmean, na.rm=T)
min(full$econmean, na.rm=T)


#Figure A1, Supplemental information
sjtmeans <- c(mean(full[full$lowincome==1 & full$highgini==0, "SJTmean"], na.rm=T), 
    mean(full[full$lowincome==1 & full$highgini==1, "SJTmean"], na.rm=T),
    mean(full[full$lowincome==0 & full$highgini==0, "SJTmean"], na.rm=T), 
    mean(full[full$lowincome==0 & full$highgini==1, "SJTmean"], na.rm=T)
   ) 

lci <- c(
(mean(full[full$lowincome==1 & full$highgini==0, "SJTmean"], na.rm=T) - sd(full[full$lowincome==1 & full$highgini==0, "SJTmean"], na.rm=T)*1.96),
(mean(full[full$lowincome==1 & full$highgini==1, "SJTmean"], na.rm=T) - sd(full[full$lowincome==1 & full$highgini==1, "SJTmean"], na.rm=T)*1.96),
(mean(full[full$lowincome==0 & full$highgini==0, "SJTmean"], na.rm=T) - sd(full[full$lowincome==0 & full$highgini==0, "SJTmean"], na.rm=T)*1.96),
(mean(full[full$lowincome==0 & full$highgini==1, "SJTmean"], na.rm=T) - sd(full[full$lowincome==0 & full$highgini==1, "SJTmean"], na.rm=T)*1.96)
)


uci <- c(
  (mean(full[full$lowincome==1 & full$highgini==0, "SJTmean"], na.rm=T) + sd(full[full$lowincome==1 & full$highgini==0, "SJTmean"], na.rm=T)*1.96),
  (mean(full[full$lowincome==1 & full$highgini==1, "SJTmean"], na.rm=T) + sd(full[full$lowincome==1 & full$highgini==1, "SJTmean"], na.rm=T)*1.96),
  (mean(full[full$lowincome==0 & full$highgini==0, "SJTmean"], na.rm=T) + sd(full[full$lowincome==0 & full$highgini==0, "SJTmean"], na.rm=T)*1.96),
  (mean(full[full$lowincome==0 & full$highgini==1, "SJTmean"], na.rm=T) + sd(full[full$lowincome==0 & full$highgini==1, "SJTmean"], na.rm=T)*1.96)
)
at <- c(3,7)

incresults <- as.data.frame(cbind(sjtmeans, uci, lci))
par(oma=c(0,4,0,0))
mp <- barplot(incresults$sjtmeans,ann=F, ylim=c(0,1), xaxt="n", space=c(0.5,0,0.5,0),col=c("grey40","lightgrey"))


pdf("FigureA1 incomeresults.pdf")
par(oma=c(0,5,0,0), xpd=T)
barplot(incresults$sjtmeans,ann=F, ylim=c(0,1), xaxt="n", space=c(0.5,0,0.5,0),col=c("grey40","lightgrey"), las=1)
axis(1,at=c(0.05,10),lwd.ticks=0, labels=c("",""))
errbar(mp,incresults$mean,incresults$uci,incresults$lci,add=T, pch=NA)
legend(x=2.8, y=1.1, legend=c("Control", "High Inequality"), fill=c("grey40","lightgrey"), bty="n", y.intersp=1.5, cex=1.2)
mtext("System\n justification\n scale (0-1)", side=2, las=1, outer=F, line=3, cex=1.2)
mtext(c("Low income \nrespondents", "High income \nrespondents"), side=1, at=c(1.5,4), line=2, cex=1.2)
dev.off()

#Table A2, Supplemental information
#interaction with binary income indicator
stargazer(SJT.inc,conf.inc,econ.inc, title="Interaction with Income",omit.stat=c("rsq","res.dev"),dep.var.labels=c("System Justification", "Confidence in Institutions", "Economic System Justification"), covariate.labels=c("High Inequality Treatment", "Below Median Household Income", "High Inequality * Low Income", "Constant"), align=T)
